
************************************************************************************************
************************************************************************************************
*This .do file creates the main analysis data file used for the project

*Inputs:
*1. Masterbuild files from the NCERDC for 2008-2017

*Output
*1. "student_mb_2009to2017.dta"

************************************************************************************************
************************************************************************************************

clear all
set more off
*Going to grab 2008 test scores just so we have lagged test scores for 2009 (no retest in 2008)
clear all
use "/data/Student/MBuild/mb_2008_pub.dta" 

*Keep only grades 3-8 who you have test scores for*
destring grade, replace i("NUS")
drop if (grade>=9 & grade<=99) | grade==2 | grade==.
drop if mastid==.

ren rd_score read
ren ma_score math
ren rd_test_id read_test_id
ren ma_test_id math_test_id
destring read math, replace i("NUL")

******REPLACE TEST SCORES AS MISSING "." FOR NCEXTEND (DISABLED) TESTS******
gen nc_extend_math=1 if math_test_id=="X1M0" | math_test_id=="X1M3" | math_test_id=="X1M4" | math_test_id=="X1M5" | math_test_id=="X1M6" | math_test_id=="X1M7" | math_test_id=="X1M8"| math_test_id=="X2M0" | math_test_id=="X2M3" | math_test_id=="X2M4" | math_test_id=="X2M5" | math_test_id=="X2M6" | math_test_id=="X2M7" | math_test_id=="X2M8" | math_test_id=="CLM0" | math_test_id=="CLM3" | math_test_id=="CLM4" | math_test_id=="CLM5" | math_test_id=="CLM6" | math_test_id=="CLM7" | math_test_id=="CLM8"
replace nc_extend_math=0 if math_test_id=="MA03" | math_test_id=="MA04" | math_test_id=="MA05" | math_test_id=="MA06" | math_test_id=="MA07" | math_test_id=="MA08"
gen nc_extend_read=1 if read_test_id=="X1R0" | read_test_id=="X1R3" | read_test_id=="X1R4" | read_test_id=="X1R5" | read_test_id=="X1R6" | read_test_id=="X1R7" | read_test_id=="X1R8"| read_test_id=="X2R0" | read_test_id=="X2R3" | read_test_id=="X2R4" | read_test_id=="X2R5" | read_test_id=="X2R6" | read_test_id=="X2R7" | read_test_id=="X2R8" | read_test_id=="CLR0" | read_test_id=="CLR3" | read_test_id=="CLR4" | read_test_id=="CLR5" | read_test_id=="CLR6" | read_test_id=="CLR7" | read_test_id=="CLR8"
replace nc_extend_read=0 if read_test_id=="RD03" | read_test_id=="RD04" | read_test_id=="RD05" | read_test_id=="RD06" | read_test_id=="RD07" | read_test_id=="RD08"

replace math=. if nc_extend_math==1
replace read=. if nc_extend_read==1

*Drop the NCExtend students (if in both subjects; will keep if non-extend in one subject)
drop if nc_extend_math==1 & nc_extend_read==1

*Make sure no error scores*
replace math=. if math<=200
replace read=. if read<=200

*Drop if missing scores on both dimensions
drop if math==. & read==.

*DEALING WITH DUPLICATES*
qui gen grprank = . 
foreach var in math read grade {
qui sort mastid `var'
qui replace grprank = 0
qui by mastid: replace grprank=1 if _n==1 & `var'!=.
qui by mastid: replace grprank =grprank[_n-1] + ((`var' != `var'[_n-1]) & `var'!=.) if _n>1
qui by mastid: gen maxrank = grprank[_N] 
*If there is only one unique occurence, assign this value to all records
by mastid: replace `var' = `var'[1] if maxrank==1
*If all missing, leave as missing 
*If more than one non-missing value, cannot know which to take, so mark as missing
qui replace `var'=. if maxrank>=2 
drop maxrank
}
duplicates drop mastid, force

*No need for covariates or test dates for 2008*
keep mastid lea schlcode grade read math

gen year=2008

compress
save "/data_analysis/NC_RD_Retake/student_level_2008_temp.dta", replace


**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************
********************************************************RETEST YEARS:2009-2012********************************************************************************************************************
**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************
clear all

foreach y of numlist 2009(1)2012{
clear all
use "/data/Student/MBuild/mb_`y'_pub.dta"
if `y'<2010{
display "Year is `y'"
}
else if `y'==2010{
display "Year is `y'"
drop sc_cscoreN sc_rg_cscoreN sc_r1_cscoreN wr_cscoreN a1_cscoreN a1_rg_cscoreN a1_r1_cscoreN a2_cscoreN a2_rg_cscoreN a2_r1_cscoreN bi_cscoreN bi_rg_cscoreN bi_r1_cscoreN ci_cscoreN ci_rg_cscoreN ci_r1_cscoreN e1_cscoreN e1_rg_cscoreN e1_r1_cscoreN gm_cscoreN gm_rg_cscoreN gm_r1_cscoreN ps_cscoreN ps_rg_cscoreN ps_r1_cscoreN us_cscoreN us_rg_cscoreN us_r1_cscoreN sc_test_id sc_test_school sc_exemption_code sc_ach_level sc_accomm sc_rg_test_id sc_rg_test_school sc_rg_exemption_code sc_rg_ach_level sc_rg_accomm sc_r1_test_id sc_r1_test_school sc_r1_exemption_code sc_r1_ach_level sc_r1_accomm wr_test_id wr_test_school wr_exemption_code wr_ach_level wr_accomm a1_test_id a1_test_school a1_exemption_code a1_ach_level a1_accomm a1_rg_test_id a1_rg_test_school a1_rg_exemption_code a1_rg_ach_level a1_rg_accomm a1_r1_test_id a1_r1_test_school a1_r1_exemption_code a1_r1_ach_level a1_r1_accomm a2_test_id a2_test_school a2_exemption_code a2_ach_level a2_accomm a2_rg_test_id a2_rg_test_school a2_rg_exemption_code a2_rg_ach_level a2_rg_accomm a2_r1_test_id a2_r1_test_school a2_r1_exemption_code a2_r1_ach_level a2_r1_accomm bi_test_id bi_test_school bi_exemption_code bi_ach_level bi_accomm bi_rg_test_id bi_rg_test_school bi_rg_exemption_code bi_rg_ach_level bi_rg_accomm bi_r1_test_id bi_r1_test_school bi_r1_exemption_code bi_r1_ach_level bi_r1_accomm ci_test_id ci_test_school ci_exemption_code ci_ach_level ci_accomm ci_rg_test_id ci_rg_test_school ci_rg_exemption_code ci_rg_ach_level ci_rg_accomm ci_r1_test_id ci_r1_test_school ci_r1_exemption_code ci_r1_ach_level ci_r1_accomm e1_test_id e1_test_school e1_exemption_code e1_ach_level e1_accomm e1_rg_test_id e1_rg_test_school e1_rg_exemption_code e1_rg_ach_level e1_rg_accomm e1_r1_test_id e1_r1_test_school e1_r1_exemption_code e1_r1_ach_level e1_r1_accomm gm_test_id gm_test_school gm_exemption_code gm_ach_level gm_accomm gm_rg_test_id gm_rg_test_school gm_rg_exemption_code gm_rg_ach_level gm_rg_accomm gm_r1_test_id gm_r1_test_school gm_r1_exemption_code gm_r1_ach_level gm_r1_accomm ps_test_id ps_test_school ps_exemption_code ps_ach_level ps_accomm ps_rg_test_id ps_rg_test_school ps_rg_exemption_code ps_rg_ach_level ps_rg_accomm ps_r1_test_id ps_r1_test_school ps_r1_exemption_code ps_r1_ach_level ps_r1_accomm us_test_id us_test_school us_exemption_code us_ach_level us_accomm us_rg_test_id us_rg_test_school us_rg_exemption_code us_rg_ach_level us_rg_accomm us_r1_test_id us_r1_test_school us_r1_exemption_code us_r1_ach_level us_r1_accomm
}
else if `y'==2011{
display "Year is `y'"
drop sc_test_dt sc_rg_test_dt sc_r1_test_dt wr_test_dt a1_test_dt a1_rg_test_dt a1_r1_test_dt a2_test_dt a2_rg_test_dt a2_r1_test_dt bi_test_dt bi_rg_test_dt bi_r1_test_dt ci_test_dt ci_rg_test_dt ci_r1_test_dt e1_test_dt e1_rg_test_dt e1_r1_test_dt ps_test_dt ps_rg_test_dt ps_r1_test_dt us_test_dt us_rg_test_dt us_r1_test_dt sc_scoreN sc_rg_scoreN sc_r1_scoreN wr_scoreN a1_scoreN a1_rg_scoreN a1_r1_scoreN a2_scoreN a2_rg_scoreN a2_r1_scoreN bi_scoreN bi_rg_scoreN bi_r1_scoreN ci_scoreN ci_rg_scoreN ci_r1_scoreN e1_scoreN e1_rg_scoreN e1_r1_scoreN ps_scoreN ps_rg_scoreN ps_r1_scoreN us_scoreN us_rg_scoreN us_r1_scoreN sc_test_id sc_test_school sc_exemption_code sc_ach_level sc_accomm sc_rg_test_id sc_rg_test_school sc_rg_exemption_code sc_rg_ach_level sc_rg_accomm sc_r1_test_id sc_r1_test_school sc_r1_exemption_code sc_r1_ach_level sc_r1_accomm wr_test_id wr_test_school wr_exemption_code wr_ach_level wr_accomm a1_test_id a1_test_school a1_exemption_code a1_ach_level a1_accomm a1_rg_test_id a1_rg_test_school a1_rg_exemption_code a1_rg_ach_level a1_rg_accomm a1_r1_test_id a1_r1_test_school a1_r1_exemption_code a1_r1_ach_level a1_r1_accomm a2_test_id a2_test_school a2_exemption_code a2_ach_level a2_accomm a2_rg_test_id a2_rg_test_school a2_rg_exemption_code a2_rg_ach_level a2_rg_accomm a2_r1_test_id a2_r1_test_school a2_r1_exemption_code a2_r1_ach_level a2_r1_accomm bi_test_id bi_test_school bi_exemption_code bi_ach_level bi_accomm bi_rg_test_id bi_rg_test_school bi_rg_exemption_code bi_rg_ach_level bi_rg_accomm bi_r1_test_id bi_r1_test_school bi_r1_exemption_code bi_r1_ach_level bi_r1_accomm ci_test_id ci_test_school ci_exemption_code ci_ach_level ci_accomm ci_rg_test_id ci_rg_test_school ci_rg_exemption_code ci_rg_ach_level ci_rg_accomm ci_r1_test_id ci_r1_test_school ci_r1_exemption_code ci_r1_ach_level ci_r1_accomm e1_test_id e1_test_school e1_exemption_code e1_ach_level e1_accomm e1_rg_test_id e1_rg_test_school e1_rg_exemption_code e1_rg_ach_level e1_rg_accomm e1_r1_test_id e1_r1_test_school e1_r1_exemption_code e1_r1_ach_level e1_r1_accomm ps_test_id ps_test_school ps_exemption_code ps_ach_level ps_accomm ps_rg_test_id ps_rg_test_school ps_rg_exemption_code ps_rg_ach_level ps_rg_accomm ps_r1_test_id ps_r1_test_school ps_r1_exemption_code ps_r1_ach_level ps_r1_accomm us_test_id us_test_school us_exemption_code us_ach_level us_accomm us_rg_test_id us_rg_test_school us_rg_exemption_code us_rg_ach_level us_rg_accomm us_r1_test_id us_r1_test_school us_r1_exemption_code us_r1_ach_level us_r1_accomm
}
else if `y'==2012{
display "Year is `y'"
drop sc_test_dt sc_rg_test_dt sc_r1_test_dt a1_test_dt a1_rg_test_dt a1_r1_test_dt bi_test_dt bi_rg_test_dt bi_r1_test_dt e1_test_dt e1_rg_test_dt e1_r1_test_dt sc_scoreN sc_rg_scoreN sc_r1_scoreN a1_scoreN a1_rg_scoreN a1_r1_scoreN bi_scoreN bi_rg_scoreN bi_r1_scoreN e1_scoreN e1_rg_scoreN e1_r1_scoreN sc_cscoreN sc_rg_cscoreN sc_r1_cscoreN a1_cscoreN a1_rg_cscoreN a1_r1_cscoreN bi_cscoreN bi_rg_cscoreN bi_r1_cscoreN e1_cscoreN e1_rg_cscoreN e1_r1_cscoreN sc_test_id sc_test_school sc_exemption_code sc_ach_level sc_accomm sc_rg_test_id sc_rg_test_school sc_rg_exemption_code sc_rg_ach_level sc_rg_accomm sc_r1_test_id sc_r1_test_school sc_r1_exemption_code sc_r1_ach_level sc_r1_accomm a1_test_id a1_test_school a1_exemption_code a1_ach_level a1_accomm a1_rg_test_id a1_rg_test_school a1_rg_exemption_code a1_rg_ach_level a1_rg_accomm a1_r1_test_id a1_r1_test_school a1_r1_exemption_code a1_r1_ach_level a1_r1_accomm bi_test_id bi_test_school bi_exemption_code bi_ach_level bi_accomm bi_rg_test_id bi_rg_test_school bi_rg_exemption_code bi_rg_ach_level bi_rg_accomm bi_r1_test_id bi_r1_test_school bi_r1_exemption_code bi_r1_ach_level bi_r1_accomm e1_test_id e1_test_school e1_exemption_code e1_ach_level e1_accomm e1_rg_test_id e1_rg_test_school e1_rg_exemption_code e1_rg_ach_level e1_rg_accomm e1_r1_test_id e1_r1_test_school e1_r1_exemption_code e1_r1_ach_level e1_r1_accomm
}

*Keep only grades 3-8 who you have test scores for*
qui destring grade, replace i("NUS")
qui drop if (grade>=9 & grade<=99) | grade==2 | grade==.
qui drop if mastid==.

if `y'==2009{
ren rd_rg_score read
ren ma_rg_score math
ren rd_r1_score read_retake
ren ma_r1_score math_retake
qui destring read math read_retake math_retake, replace i("NUL")
ren rd_test_id read_test_id
ren ma_rg_test_id math_test_id
gen read_retake_id=read_test_id
ren ma_r1_test_id math_retake_id
}

else if `y'>=2010{
ren rd_rg_scoreN read
ren ma_rg_scoreN math
ren rd_r1_scoreN read_retake
ren ma_r1_scoreN math_retake
qui destring read math read_retake math_retake, replace i("NUL")
ren rd_rg_test_id read_test_id
ren ma_rg_test_id math_test_id
ren rd_r1_test_id read_retake_id
ren ma_r1_test_id math_retake_id
}

******REPLACE TEST SCORES AS MISSING "." FOR NCEXTEND (DISABLED) TESTS******
qui gen nc_extend_math=1 if math_test_id=="X1M0" | math_test_id=="X1M3" | math_test_id=="X1M4" | math_test_id=="X1M5" | math_test_id=="X1M6" | math_test_id=="X1M7" | math_test_id=="X1M8"| math_test_id=="X2M0" | math_test_id=="X2M3" | math_test_id=="X2M4" | math_test_id=="X2M5" | math_test_id=="X2M6" | math_test_id=="X2M7" | math_test_id=="X2M8" | math_test_id=="CLM0" | math_test_id=="CLM3" | math_test_id=="CLM4" | math_test_id=="CLM5" | math_test_id=="CLM6" | math_test_id=="CLM7" | math_test_id=="CLM8"
qui replace nc_extend_math=0 if math_test_id=="MA03" | math_test_id=="MA04" | math_test_id=="MA05" | math_test_id=="MA06" | math_test_id=="MA07" | math_test_id=="MA08"
qui gen nc_extend_read=1 if read_test_id=="X1R0" | read_test_id=="X1R3" | read_test_id=="X1R4" | read_test_id=="X1R5" | read_test_id=="X1R6" | read_test_id=="X1R7" | read_test_id=="X1R8"| read_test_id=="X2R0" | read_test_id=="X2R3" | read_test_id=="X2R4" | read_test_id=="X2R5" | read_test_id=="X2R6" | read_test_id=="X2R7" | read_test_id=="X2R8" | read_test_id=="CLR0" | read_test_id=="CLR3" | read_test_id=="CLR4" | read_test_id=="CLR5" | read_test_id=="CLR6" | read_test_id=="CLR7" | read_test_id=="CLR8"
qui replace nc_extend_read=0 if read_test_id=="RD03" | read_test_id=="RD04" | read_test_id=="RD05" | read_test_id=="RD06" | read_test_id=="RD07" | read_test_id=="RD08"

qui replace math=. if nc_extend_math==1
qui replace read=. if nc_extend_read==1

*Same thing for retakes
qui gen nc_extend_math_retake=1 if math_retake_id=="X1M0" | math_retake_id=="X1M3" | math_retake_id=="X1M4" | math_retake_id=="X1M5" | math_retake_id=="X1M6" | math_retake_id=="X1M7" | math_retake_id=="X1M8"| math_retake_id=="X2M0" | math_retake_id=="X2M3" | math_retake_id=="X2M4" | math_retake_id=="X2M5" | math_retake_id=="X2M6" | math_retake_id=="X2M7" | math_retake_id=="X2M8" | math_retake_id=="CLM0" | math_retake_id=="CLM3" | math_retake_id=="CLM4" | math_retake_id=="CLM5" | math_retake_id=="CLM6" | math_retake_id=="CLM7" | math_retake_id=="CLM8"
qui replace nc_extend_math_retake=0 if math_retake_id=="MA03" | math_retake_id=="MA04" | math_retake_id=="MA05" | math_retake_id=="MA06" | math_retake_id=="MA07" | math_retake_id=="MA08"
qui gen nc_extend_read_retake=1 if read_retake_id=="X1R0" | read_retake_id=="X1R3" | read_retake_id=="X1R4" | read_retake_id=="X1R5" | read_retake_id=="X1R6" | read_retake_id=="X1R7" | read_retake_id=="X1R8"| read_retake_id=="X2R0" | read_retake_id=="X2R3" | read_retake_id=="X2R4" | read_retake_id=="X2R5" | read_retake_id=="X2R6" | read_retake_id=="X2R7" | read_retake_id=="X2R8" | read_retake_id=="CLR0" | read_retake_id=="CLR3" | read_retake_id=="CLR4" | read_retake_id=="CLR5" | read_retake_id=="CLR6" | read_retake_id=="CLR7" | read_retake_id=="CLR8"
qui replace nc_extend_read_retake=0 if read_retake_id=="RD03" | read_retake_id=="RD04" | read_retake_id=="RD05" | read_retake_id=="RD06" | read_retake_id=="RD07" | read_retake_id=="RD08"

qui replace math_retake=. if nc_extend_math_retake==1
qui replace read_retake=. if nc_extend_read_retake==1

*Drop the NCExtend students (if in both subjects)
qui drop if nc_extend_math==1 & nc_extend_read==1

*Make sure no error scores*
qui replace math=. if math<=200
qui replace read=. if read<=200
qui replace math_retake=. if math<=200
qui replace read_retake=. if read<=200

*Drop if missing scores on all dimensions
qui drop if math==. & math_retake==. & read==. & read_retake==.

*DEALING WITH DUPLICATES*
qui gen grprank = . 
foreach var in math read math_retake read_retake grade {
qui sort mastid `var'
qui replace grprank = 0
qui by mastid: replace grprank=1 if _n==1 & `var'!=.
qui by mastid: replace grprank =grprank[_n-1] + ((`var' != `var'[_n-1]) & `var'!=.) if _n>1
qui by mastid: gen maxrank = grprank[_N] 
*If there is only one unique occurence, assign this value to all records
by mastid: replace `var' = `var'[1] if maxrank==1
*If all missing, leave as missing 
*If more than one non-missing value, cannot know which to take, so mark as missing
qui replace `var'=. if maxrank>=2 
drop maxrank
}
duplicates drop mastid, force

*Fix covariates (e.g., make them consistent across years)*
***Ethnicity***
if `y'==2009 {
qui ren ethnicity ethnic
qui replace ethnic="I" if ethnic=="AMIN"
qui replace ethnic="A" if ethnic=="ASIA"
qui replace ethnic="H" if ethnic=="HISP"
qui replace ethnic="B" if ethnic=="BLCK"
qui replace ethnic="W" if ethnic=="WHTE"
qui replace ethnic="M" if ethnic=="MULT"
}
else if `y'==2010 {
qui ren ethnicity ethnic
qui replace ethnic="I" if ethnic=="AMIN"
qui replace ethnic="A" if ethnic=="ASIA"
qui replace ethnic="H" if ethnic=="HISP"
qui replace ethnic="B" if ethnic=="BLCK"
qui replace ethnic="W" if ethnic=="WHTE"
qui replace ethnic="M" if ethnic=="MULT"
}
else if `y'>=2011 {
qui ren ethnicity ethnic
qui replace ethnic="A" if ethnic=="P"
}
*Sex*
qui ren sex sex1
qui gen sex=1 if sex1=="M"
qui replace sex=0 if sex1=="F"
*LEP*
qui ren lep lep1
qui gen lep=1 if lep1=="Y" | lep1=="1"
qui replace lep=0 if lep1=="N" | lep1=="U"
*SWD*
qui drop swd
qui gen swd=1 if ec_code!="NULL" & ec_code!=""
qui replace swd=0 if ec_code=="NULL"
*EDS*
qui ren eds eds1
qui gen eds=1 if eds1=="Y"
qui replace eds=0 if eds1=="N"
***GIFTED***
qui gen aig_math=1 if aig_m=="Y"
qui replace aig_math=0 if aig_m=="N"
qui gen aig_read=1 if aig_r=="Y"
qui replace aig_read=0 if aig_r=="N"

if `y'==2009 {
ren rd_rg_test_date read_reg_date
ren rd_r1_test_date read_retest_date
ren ma_rg_test_date math_reg_date
ren ma_r1_test_date math_retest_date
*Make test dates non-string to match later years
foreach var of varlist read_reg_date read_retest_date math_reg_date math_retest_date {
gen year_`var'=substr(`var',1,4)
gen month_`var'=substr(`var',5,2)
gen day_`var'=substr(`var',7,2)
drop `var'
destring month_`var' day_`var' year_`var', replace i("NUL")
gen `var'=mdy(month_`var',day_`var',year_`var')
format `var' %td
drop month_`var' day_`var' year_`var'
}
}

else if `y'>=2010 {
ren rd_rg_test_dt read_reg_date
ren rd_r1_test_dt read_retest_date
ren ma_rg_test_dt math_reg_date
ren ma_r1_test_dt math_retest_date
}

keep mastid lea schlcode math read math_retake read_retake grade ethnic sex lep swd eds aig_read aig_math read_reg_date read_retest_date math_reg_date math_retest_date nc_extend_math nc_extend_read nc_extend_math_retake nc_extend_read_retake

gen year=`y'

compress
save "/data_analysis/NC_RD_Retake/student_level_`y'_temp.dta", replace
}


**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************
***************************************************************2013-2017**************************************************************************************************************************
**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************

*No retests in these years, just grabbing them for future test score outcomes*
clear all
foreach y of numlist 2013(1)2017{
display "Year is `y'"
clear all
use "/data/Student/MBuild/pcaudit_pub`y'.dta"

*Keep only grades 3-9 who you have test scores for*
destring grade, replace i("S")
drop if (grade>=9 & grade<=99) | grade==2 | grade==.
drop if mastid==.

ren pc_rd_score read
ren pc_ma_score math

******REPLACE TEST SCORES AS MISSING "." FOR NCEXTEND (DISABLED) TESTS******
gen nc_extend_math=(pc_ma_type=="X1" | pc_ma_type=="X2")
gen nc_extend_read=(pc_rd_type=="X1" | pc_rd_type=="X2")

qui replace math=. if nc_extend_math==1
qui replace read=. if nc_extend_read==1

drop if nc_extend_math==1 & nc_extend_read==1

*Make sure no error scores*
qui replace math=. if math<=200
qui replace read=. if read<=200

*DEALING WITH DUPLICATES*
qui gen grprank = . 
foreach var in math read {
qui sort mastid `var'
qui replace grprank = 0
qui by mastid: replace grprank=1 if _n==1 & `var'!=.
qui by mastid: replace grprank =grprank[_n-1] + ((`var' != `var'[_n-1]) & `var'!=.) if _n>1
qui by mastid: gen maxrank = grprank[_N] 
*If there is only one unique occurence, assign this value to all records
by mastid: replace `var' = `var'[1] if maxrank==1
*If all missing, leave as missing 
*If more than one non-missing value, cannot know which to take, so mark as missing
qui replace `var'=. if maxrank>=2 
drop maxrank
}
duplicates drop mastid, force

*Fix covariates*
***Ethnicity***
qui replace ethnic="A" if ethnic=="P"
*Sex*
qui ren sex sex1
qui gen sex=1 if sex1=="M"
qui replace sex=0 if sex1=="F"
*LEP*
qui ren lep lep1
qui gen lep=1 if lep1=="Y" | lep1=="1"
qui replace lep=0 if lep1=="N" | lep1=="U"
*SWD*
if `y'<2017{
qui drop swd
qui gen swd=1 if ec_code!="NULL" & ec_code!=""
qui replace swd=0 if ec_code=="NULL"
}
else if `y'==2017{
gen swd1=1 if swd=="Y"
replace swd1=0 if swd=="N" | swd=="U"
drop swd
ren swd1 swd
}
*EDS*
qui ren eds eds1
qui gen eds=1 if eds1=="Y"
qui replace eds=0 if eds1=="N"
***GIFTED***
if `y'==2013{
qui gen aig_math=1 if aig=="M" | aig=="B"
qui replace aig_math=0 if aig=="N" | aig=="R"
qui gen aig_read=1 if aig=="R" | aig=="B"
qui replace aig_read=0 if aig=="N" | aig=="M"
}
else if `y'>=2014{
qui gen aig_math=1 if aig=="M" | aig=="B" | aig=="I"
qui replace aig_math=0 if aig=="N" | aig=="R"
qui gen aig_read=1 if aig=="R" | aig=="B" | aig=="I"
qui replace aig_read=0 if aig=="N" | aig=="M"
}

keep mastid lea schlcode math read grade ethnic sex lep swd eds aig_read aig_math nc_extend_math nc_extend_read

gen year=`y'

compress
save "/data_analysis/NC_RD_Retake/student_level_`y'_temp.dta", replace
}




**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************
***************************************************NOW COMBINE ALL YEARS AND STANDARDIZE TEST SCORES**********************************************************************************************
**************************************************************************************************************************************************************************************************
**************************************************************************************************************************************************************************************************

*First bring in the retest years and normalize scores by distance to retest cutoff so that test score in these years is the running variable*
clear all
foreach y of numlist 2009(1)2012{
append using "/data_analysis/NC_RD_Retake/student_level_`y'_temp.dta"
}

*Normalize test scores by distance to retest cutoff
*Note: these values are from the NC Department of Public Instruction website.  Unfortunately they changed their website in 2019, but these cutoffs can still be found via the Wayback machine.
*For instance, the math cutoffs are here: https://web.archive.org/web/20110228075237/http://www.dpi.state.nc.us/accountability/testing/shared/achievelevel/matheog
gen retested_m=(math_retest!=.)
gen running_math=math-338.5 if grade==3
replace running_math=math-344.5 if grade==4
replace running_math=math-350.5 if grade==5
replace running_math=math-351.5 if grade==6
replace running_math=math-354.5 if grade==7
replace running_math=math-356.5 if grade==8
gen retested_r=(read_retest!=.)
gen running_read=read-337.5 if grade==3
replace running_read=read-342.5 if grade==4
replace running_read=read-348.5 if grade==5
replace running_read=read-350.5 if grade==6
replace running_read=read-355.5 if grade==7
replace running_read=read-357.5 if grade==8

*Add 2008
append using "/data_analysis/NC_RD_Retake/student_level_2008_temp.dta"

*Add 2013-2017
foreach y of numlist 2013(1)2017{
append using "/data_analysis/NC_RD_Retake/student_level_`y'_temp.dta"
}

encode ethnic, gen(ethnic1)
drop ethnic
ren ethnic1 ethnic

****CREATE STANDARDIZED MATH AND READING SCORES****
local subject "math read"
gen math_stan=.
gen read_stan=.
foreach var of local subject{
foreach t of numlist 2008(1)2017{
foreach g of numlist 3(1)8{
qui su `var' if grade==`g' & year==`t'
qui replace `var'_stan=(`var'-r(mean))/r(sd) if grade==`g' & year==`t'
}
}
}
ren math_stan smathscal
ren read_stan sreadscal

***Get future scores and repeat/skip indicators***
xtset mastid year
gen Fsmathscal=F.smathscal
gen F2smathscal=F2.smathscal
gen F3smathscal=F3.smathscal
gen F4smathscal=F4.smathscal
gen Fsreadscal=F.sreadscal
gen F2sreadscal=F2.sreadscal
gen F3sreadscal=F3.sreadscal
gen F4sreadscal=F4.sreadscal

gen repeat=0 if (grade-1)==L.grade
replace repeat=1 if grade==L.grade

gen Frepeat=F.repeat

ren Frepeat repeat_next_year
ren repeat repeating

*Create unique schoolid and charter dummy
egen schoolid=group(lea schlcode)
gen charter=(schlcode=="000")

*Lagged test scores
xtset mastid year
gen lag_smathscal=L.smathscal
gen lag_sreadscal=L.sreadscal
*2008 no longer needed (only used for lagged scores)
drop if year==2008

order mastid year grade lea schlcode schoolid math running_math retested_m math_retake read running_read retested_r read_retake mastid sex lep swd eds aig_math aig_read repeating repeat_next_year ethnic charter lag_smathscal smathscal Fsmathscal F2smathscal F3smathscal F4smathscal lag_sreadscal sreadscal Fsreadscal F2sreadscal F3sreadscal F4sreadscal math_reg_date math_retest_date read_reg_date read_retest_date nc_extend_math nc_extend_math_retake nc_extend_read nc_extend_read_retake 

compress
save "/data_analysis/NC_RD_Retake/student_mb_2009to2017.dta", replace

foreach y of numlist 2008(1)2017{
erase "/data_analysis/NC_RD_Retake/student_level_`y'_temp.dta"
}









